#导入所需要的包
import numpy as np
from math import sqrt
from collections import Counter
def distance(k, X_train, Y_train, x):
    #保证K有效
    assert 1 <= k <= X_train.shape[0], "K must be valid"
    #X_train的值必须等于y_train的值
    assert X_train.shape[0] == Y_train.shape[0], "the size of X_train must equal to the size of y_train"
    #x的特征号必须等于X_train
    assert X_train.shape[1] == x.shape[0], "the feature number of x must be equal to X_train"
    #迅速计算距离
    distance = [sqrt(np.sum((x_train - x)**2)) for x_train in X_train]
    #返回距离值从小到大排序后的索引值的数组
    nearest = np.argsort(distance)
    #获取距离最小的前k个样本的标签
    topk_y = [Y_train[i] for i in nearest[:k]]
    #统计前k个样本的标签类别以及对应的频数
    votes = Counter(topk_y)
    #返回频数最多的类别
    return votes.most_common(1)[0][0]
if __name__ == "__main__":
    #使用numpy生成8个点
    X_train = np.array([[1.0, 3.5],
                       [2.0, 7],
                       [3.0, 10.5],
                       [4.0, 14],
                       [5, 25],
                       [6, 30],
                       [7, 35],
                       [8, 40]])
    #使用numpy生成8个点对应的类别
    Y_train = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    #使用numpy生成待分类样本点
    x = np.array([8, 21])
    #调用distance函数并传入参数
    label = distance(3, X_train, Y_train, x)
    #显示待测样本点的分类结果
    print(label)